
# PROJECT DATASETS/INSPIRATION FOUND AT https://github.com/KeithGalli/Pandas-Data-Science-Tasks/tree/master/SalesAnalysis/Sales_Data
# IMPORTING LIBRARIES/DATASETS
import pandas as pd
import numpy as np
pd.options.mode.chained_assignment = None
# ADDING 'pd.options.mode.chained_assignment = None' REMOVES THE 'SettingWithCopyWarning' THAT WILL
# SHOW WHEN RUNNING CODE LATER ON IN THE PROJECT
import plotly.express as px
import plotly.graph_objects as go
jan_sales_2019 = pd.read_csv(r'C:\Users\jksmi\Downloads\salesproductproject\Sales_January_2019.csv')
feb_sales_2019 = pd.read_csv(r'C:\Users\jksmi\Downloads\salesproductproject\Sales_February_2019.csv')
mar_sales_2019 = pd.read_csv(r'C:\Users\jksmi\Downloads\salesproductproject\Sales_March_2019.csv')
apr_sales_2019 = pd.read_csv(r'C:\Users\jksmi\Downloads\salesproductproject\Sales_April_2019.csv')
may_sales_2019 = pd.read_csv(r'C:\Users\jksmi\Downloads\salesproductproject\Sales_May_2019.csv')
june_sales_2019 = pd.read_csv(r'C:\Users\jksmi\Downloads\salesproductproject\Sales_June_2019.csv')
july_sales_2019 = pd.read_csv(r'C:\Users\jksmi\Downloads\salesproductproject\Sales_July_2019.csv')
aug_sales_2019 = pd.read_csv(r'C:\Users\jksmi\Downloads\salesproductproject\Sales_August_2019.csv')
sept_sales_2019 = pd.read_csv(r'C:\Users\jksmi\Downloads\salesproductproject\Sales_September_2019.csv')
oct_sales_2019 = pd.read_csv(r'C:\Users\jksmi\Downloads\salesproductproject\Sales_October_2019.csv')
nov_sales_2019 = pd.read_csv(r'C:\Users\jksmi\Downloads\salesproductproject\Sales_November_2019.csv')
dec_sales_2019 = pd.read_csv(r'C:\Users\jksmi\Downloads\salesproductproject\Sales_December_2019.csv')
# ALL DATASETS WERE CLEANED OF NULL VALUES IN EXCEL PRIOR TO IMPORT
# OBSERVING THE 'jan_sales_2019' DATAFRAME
jan_sales_2019
| Order ID | Product | Quantity Ordered | Price Each | Order Date | Purchase Address | |
|---|---|---|---|---|---|---|
| 0 | 141234 | iPhone | 1 | 700.00 | 1/22/2019 21:25 | 944 Walnut St, Boston, MA 02215 |
| 1 | 141235 | Lightning Charging Cable | 1 | 14.95 | 1/28/2019 14:15 | 185 Maple St, Portland, OR 97035 |
| 2 | 141236 | Wired Headphones | 2 | 11.99 | 1/17/2019 13:33 | 538 Adams St, San Francisco, CA 94016 |
| 3 | 141237 | 27in FHD Monitor | 1 | 149.99 | 1/5/2019 20:33 | 738 10th St, Los Angeles, CA 90001 |
| 4 | 141238 | Wired Headphones | 1 | 11.99 | 1/25/2019 11:59 | 387 10th St, Austin, TX 73301 |
| ... | ... | ... | ... | ... | ... | ... |
| 9676 | 150497 | 20in Monitor | 1 | 109.99 | 1/26/2019 19:09 | 95 8th St, Dallas, TX 75001 |
| 9677 | 150498 | 27in FHD Monitor | 1 | 149.99 | 1/10/2019 22:58 | 403 7th St, San Francisco, CA 94016 |
| 9678 | 150499 | ThinkPad Laptop | 1 | 999.99 | 1/21/2019 14:31 | 214 Main St, Portland, OR 97035 |
| 9679 | 150500 | AAA Batteries (4-pack) | 2 | 2.99 | 1/15/2019 14:21 | 810 2nd St, Los Angeles, CA 90001 |
| 9680 | 150501 | Google Phone | 1 | 600.00 | 1/13/2019 16:43 | 428 Cedar St, Boston, MA 02215 |
9681 rows × 6 columns
# CREATING A LIST OF ALL THE MONTHS (EXCLUDING JANUARY) IN ORDER TO APPEND AND CREATE ONE SALES DATAFRAME FOR 2019
sales_list = [feb_sales_2019,mar_sales_2019,apr_sales_2019,may_sales_2019,june_sales_2019,july_sales_2019,aug_sales_2019,sept_sales_2019,oct_sales_2019,nov_sales_2019,dec_sales_2019]
# APPENDING THE 'sales_list' DATAFRAME TO 'jan_sales_2019' TO CREATE ONE SALES DATAFRAME FOR 2019 NAMED 'sales_2019'
sales_2019 = jan_sales_2019.append(sales_list, sort=False)
# CHECKING THE 'sales_2019' DATAFRAME
sales_2019
| Order ID | Product | Quantity Ordered | Price Each | Order Date | Purchase Address | |
|---|---|---|---|---|---|---|
| 0 | 141234 | iPhone | 1 | 700.00 | 1/22/2019 21:25 | 944 Walnut St, Boston, MA 02215 |
| 1 | 141235 | Lightning Charging Cable | 1 | 14.95 | 1/28/2019 14:15 | 185 Maple St, Portland, OR 97035 |
| 2 | 141236 | Wired Headphones | 2 | 11.99 | 1/17/2019 13:33 | 538 Adams St, San Francisco, CA 94016 |
| 3 | 141237 | 27in FHD Monitor | 1 | 149.99 | 1/5/2019 20:33 | 738 10th St, Los Angeles, CA 90001 |
| 4 | 141238 | Wired Headphones | 1 | 11.99 | 1/25/2019 11:59 | 387 10th St, Austin, TX 73301 |
| ... | ... | ... | ... | ... | ... | ... |
| 24984 | 319666 | Lightning Charging Cable | 1 | 14.95 | 12/11/2019 20:58 | 14 Madison St, San Francisco, CA 94016 |
| 24985 | 319667 | AA Batteries (4-pack) | 2 | 3.84 | 12/1/2019 12:01 | 549 Willow St, Los Angeles, CA 90001 |
| 24986 | 319668 | Vareebadd Phone | 1 | 400.00 | 12/9/2019 6:43 | 273 Wilson St, Seattle, WA 98101 |
| 24987 | 319669 | Wired Headphones | 1 | 11.99 | 12/3/2019 10:39 | 778 River St, Dallas, TX 75001 |
| 24988 | 319670 | Bose SoundSport Headphones | 1 | 99.99 | 12/21/2019 21:45 | 747 Chestnut St, Los Angeles, CA 90001 |
185950 rows × 6 columns
# OBSERVING THE DATA TYPES OF THE COLUMNS IN THE DATAFRAME
sales_2019.dtypes
Order ID int64 Product object Quantity Ordered int64 Price Each float64 Order Date object Purchase Address object dtype: object
# CREATING A 'Customer ID' column based on the unique values in the 'Purchase Address' COLUMN
sales_2019['Customer ID'] = sales_2019.groupby('Purchase Address').ngroup()+100000
# CHECKING THE 'Customer ID' column in the 'sales_2019' DATAFRAME
sales_2019
| Order ID | Product | Quantity Ordered | Price Each | Order Date | Purchase Address | Customer ID | |
|---|---|---|---|---|---|---|---|
| 0 | 141234 | iPhone | 1 | 700.00 | 1/22/2019 21:25 | 944 Walnut St, Boston, MA 02215 | 232266 |
| 1 | 141235 | Lightning Charging Cable | 1 | 14.95 | 1/28/2019 14:15 | 185 Maple St, Portland, OR 97035 | 113519 |
| 2 | 141236 | Wired Headphones | 2 | 11.99 | 1/17/2019 13:33 | 538 Adams St, San Francisco, CA 94016 | 168669 |
| 3 | 141237 | 27in FHD Monitor | 1 | 149.99 | 1/5/2019 20:33 | 738 10th St, Los Angeles, CA 90001 | 199824 |
| 4 | 141238 | Wired Headphones | 1 | 11.99 | 1/25/2019 11:59 | 387 10th St, Austin, TX 73301 | 144894 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 24984 | 319666 | Lightning Charging Cable | 1 | 14.95 | 12/11/2019 20:58 | 14 Madison St, San Francisco, CA 94016 | 106487 |
| 24985 | 319667 | AA Batteries (4-pack) | 2 | 3.84 | 12/1/2019 12:01 | 549 Willow St, Los Angeles, CA 90001 | 170444 |
| 24986 | 319668 | Vareebadd Phone | 1 | 400.00 | 12/9/2019 6:43 | 273 Wilson St, Seattle, WA 98101 | 127410 |
| 24987 | 319669 | Wired Headphones | 1 | 11.99 | 12/3/2019 10:39 | 778 River St, Dallas, TX 75001 | 206162 |
| 24988 | 319670 | Bose SoundSport Headphones | 1 | 99.99 | 12/21/2019 21:45 | 747 Chestnut St, Los Angeles, CA 90001 | 201282 |
185950 rows × 7 columns
# CHECKING THE NUMBER OF UNIQUE CUSTOMER ID's IN THE 'sales_2019' DATAFRAME
sales_2019['Customer ID'].nunique()
140787
# CHANGING THE COLUMN NAMES IN THE DATAFRAME
new_column_list = ['order_id','product','qty_ordered','price_each','order_date','purchase_address','customer_id']
sales_2019.columns = new_column_list
# OBSERVING THE COLUMN NAME CHANGE
sales_2019
| order_id | product | qty_ordered | price_each | order_date | purchase_address | customer_id | |
|---|---|---|---|---|---|---|---|
| 0 | 141234 | iPhone | 1 | 700.00 | 1/22/2019 21:25 | 944 Walnut St, Boston, MA 02215 | 232266 |
| 1 | 141235 | Lightning Charging Cable | 1 | 14.95 | 1/28/2019 14:15 | 185 Maple St, Portland, OR 97035 | 113519 |
| 2 | 141236 | Wired Headphones | 2 | 11.99 | 1/17/2019 13:33 | 538 Adams St, San Francisco, CA 94016 | 168669 |
| 3 | 141237 | 27in FHD Monitor | 1 | 149.99 | 1/5/2019 20:33 | 738 10th St, Los Angeles, CA 90001 | 199824 |
| 4 | 141238 | Wired Headphones | 1 | 11.99 | 1/25/2019 11:59 | 387 10th St, Austin, TX 73301 | 144894 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 24984 | 319666 | Lightning Charging Cable | 1 | 14.95 | 12/11/2019 20:58 | 14 Madison St, San Francisco, CA 94016 | 106487 |
| 24985 | 319667 | AA Batteries (4-pack) | 2 | 3.84 | 12/1/2019 12:01 | 549 Willow St, Los Angeles, CA 90001 | 170444 |
| 24986 | 319668 | Vareebadd Phone | 1 | 400.00 | 12/9/2019 6:43 | 273 Wilson St, Seattle, WA 98101 | 127410 |
| 24987 | 319669 | Wired Headphones | 1 | 11.99 | 12/3/2019 10:39 | 778 River St, Dallas, TX 75001 | 206162 |
| 24988 | 319670 | Bose SoundSport Headphones | 1 | 99.99 | 12/21/2019 21:45 | 747 Chestnut St, Los Angeles, CA 90001 | 201282 |
185950 rows × 7 columns
# CREATING A COLUMN THAT SHOWS THE TOTAL ORDER AMOUNT FOR EACH ORDER (NAMING THE column 'order_total')
sales_2019['order_total'] = sales_2019['qty_ordered'] * sales_2019['price_each']
# OBSERVING THE 'order_total' COLUMN IN THE 'sales_2019' DATAFRAME
sales_2019.head(10)
| order_id | product | qty_ordered | price_each | order_date | purchase_address | customer_id | order_total | |
|---|---|---|---|---|---|---|---|---|
| 0 | 141234 | iPhone | 1 | 700.00 | 1/22/2019 21:25 | 944 Walnut St, Boston, MA 02215 | 232266 | 700.00 |
| 1 | 141235 | Lightning Charging Cable | 1 | 14.95 | 1/28/2019 14:15 | 185 Maple St, Portland, OR 97035 | 113519 | 14.95 |
| 2 | 141236 | Wired Headphones | 2 | 11.99 | 1/17/2019 13:33 | 538 Adams St, San Francisco, CA 94016 | 168669 | 23.98 |
| 3 | 141237 | 27in FHD Monitor | 1 | 149.99 | 1/5/2019 20:33 | 738 10th St, Los Angeles, CA 90001 | 199824 | 149.99 |
| 4 | 141238 | Wired Headphones | 1 | 11.99 | 1/25/2019 11:59 | 387 10th St, Austin, TX 73301 | 144894 | 11.99 |
| 5 | 141239 | AAA Batteries (4-pack) | 1 | 2.99 | 1/29/2019 20:22 | 775 Willow St, San Francisco, CA 94016 | 205768 | 2.99 |
| 6 | 141240 | 27in 4K Gaming Monitor | 1 | 389.99 | 1/26/2019 12:16 | 979 Park St, Los Angeles, CA 90001 | 237633 | 389.99 |
| 7 | 141241 | USB-C Charging Cable | 1 | 11.95 | 1/5/2019 12:04 | 181 6th St, San Francisco, CA 94016 | 112853 | 11.95 |
| 8 | 141242 | Bose SoundSport Headphones | 1 | 99.99 | 1/1/2019 10:30 | 867 Willow St, Los Angeles, CA 90001 | 220222 | 99.99 |
| 9 | 141243 | Apple Airpods Headphones | 1 | 150.00 | 1/22/2019 21:20 | 657 Johnson St, San Francisco, CA 94016 | 187333 | 150.00 |
# NOW TO SPLIT THE 'purchase_address' TO CREATE 'address', 'city', 'state', 'zip_code' COLUMNS INTO THE 'sales_2019' DATAFRAME
# NOW TO SPLIT THE ADDRESS
sales_2019['address'] = sales_2019['purchase_address'].apply(lambda x: x.split(',')[0])
sales_2019['city'] = sales_2019['purchase_address'].apply(lambda x: x.split(',')[1])
sales_2019['state'] = sales_2019['purchase_address'].apply(lambda x: x.split(',')[2].split(' ')[1])
sales_2019['zip_code'] = sales_2019['purchase_address'].apply(lambda x: x.split(',')[2].split(' ')[2])
sales_2019
| order_id | product | qty_ordered | price_each | order_date | purchase_address | customer_id | order_total | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 141234 | iPhone | 1 | 700.00 | 1/22/2019 21:25 | 944 Walnut St, Boston, MA 02215 | 232266 | 700.00 | 944 Walnut St | Boston | MA | 02215 |
| 1 | 141235 | Lightning Charging Cable | 1 | 14.95 | 1/28/2019 14:15 | 185 Maple St, Portland, OR 97035 | 113519 | 14.95 | 185 Maple St | Portland | OR | 97035 |
| 2 | 141236 | Wired Headphones | 2 | 11.99 | 1/17/2019 13:33 | 538 Adams St, San Francisco, CA 94016 | 168669 | 23.98 | 538 Adams St | San Francisco | CA | 94016 |
| 3 | 141237 | 27in FHD Monitor | 1 | 149.99 | 1/5/2019 20:33 | 738 10th St, Los Angeles, CA 90001 | 199824 | 149.99 | 738 10th St | Los Angeles | CA | 90001 |
| 4 | 141238 | Wired Headphones | 1 | 11.99 | 1/25/2019 11:59 | 387 10th St, Austin, TX 73301 | 144894 | 11.99 | 387 10th St | Austin | TX | 73301 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 24984 | 319666 | Lightning Charging Cable | 1 | 14.95 | 12/11/2019 20:58 | 14 Madison St, San Francisco, CA 94016 | 106487 | 14.95 | 14 Madison St | San Francisco | CA | 94016 |
| 24985 | 319667 | AA Batteries (4-pack) | 2 | 3.84 | 12/1/2019 12:01 | 549 Willow St, Los Angeles, CA 90001 | 170444 | 7.68 | 549 Willow St | Los Angeles | CA | 90001 |
| 24986 | 319668 | Vareebadd Phone | 1 | 400.00 | 12/9/2019 6:43 | 273 Wilson St, Seattle, WA 98101 | 127410 | 400.00 | 273 Wilson St | Seattle | WA | 98101 |
| 24987 | 319669 | Wired Headphones | 1 | 11.99 | 12/3/2019 10:39 | 778 River St, Dallas, TX 75001 | 206162 | 11.99 | 778 River St | Dallas | TX | 75001 |
| 24988 | 319670 | Bose SoundSport Headphones | 1 | 99.99 | 12/21/2019 21:45 | 747 Chestnut St, Los Angeles, CA 90001 | 201282 | 99.99 | 747 Chestnut St | Los Angeles | CA | 90001 |
185950 rows × 12 columns
sales_2019.head()
| order_id | product | qty_ordered | price_each | order_date | purchase_address | customer_id | order_total | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 141234 | iPhone | 1 | 700.00 | 1/22/2019 21:25 | 944 Walnut St, Boston, MA 02215 | 232266 | 700.00 | 944 Walnut St | Boston | MA | 02215 |
| 1 | 141235 | Lightning Charging Cable | 1 | 14.95 | 1/28/2019 14:15 | 185 Maple St, Portland, OR 97035 | 113519 | 14.95 | 185 Maple St | Portland | OR | 97035 |
| 2 | 141236 | Wired Headphones | 2 | 11.99 | 1/17/2019 13:33 | 538 Adams St, San Francisco, CA 94016 | 168669 | 23.98 | 538 Adams St | San Francisco | CA | 94016 |
| 3 | 141237 | 27in FHD Monitor | 1 | 149.99 | 1/5/2019 20:33 | 738 10th St, Los Angeles, CA 90001 | 199824 | 149.99 | 738 10th St | Los Angeles | CA | 90001 |
| 4 | 141238 | Wired Headphones | 1 | 11.99 | 1/25/2019 11:59 | 387 10th St, Austin, TX 73301 | 144894 | 11.99 | 387 10th St | Austin | TX | 73301 |
# CHANGING THE 'order_date' TO DATE TO REMOVE THE TIMESTAMP
sales_2019['order_date'] = pd.to_datetime(sales_2019['order_date']).dt.date
sales_2019.head()
| order_id | product | qty_ordered | price_each | order_date | purchase_address | customer_id | order_total | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 141234 | iPhone | 1 | 700.00 | 2019-01-22 | 944 Walnut St, Boston, MA 02215 | 232266 | 700.00 | 944 Walnut St | Boston | MA | 02215 |
| 1 | 141235 | Lightning Charging Cable | 1 | 14.95 | 2019-01-28 | 185 Maple St, Portland, OR 97035 | 113519 | 14.95 | 185 Maple St | Portland | OR | 97035 |
| 2 | 141236 | Wired Headphones | 2 | 11.99 | 2019-01-17 | 538 Adams St, San Francisco, CA 94016 | 168669 | 23.98 | 538 Adams St | San Francisco | CA | 94016 |
| 3 | 141237 | 27in FHD Monitor | 1 | 149.99 | 2019-01-05 | 738 10th St, Los Angeles, CA 90001 | 199824 | 149.99 | 738 10th St | Los Angeles | CA | 90001 |
| 4 | 141238 | Wired Headphones | 1 | 11.99 | 2019-01-25 | 387 10th St, Austin, TX 73301 | 144894 | 11.99 | 387 10th St | Austin | TX | 73301 |
sales_2019['order_date'].dtypes
dtype('O')
# CHANGING THE 'order_date' COLUMN TO THE 'datetime64[ns]' DATA TYPE
sales_2019['order_date'] = sales_2019['order_date'].astype('datetime64[ns]')
sales_2019.head()
| order_id | product | qty_ordered | price_each | order_date | purchase_address | customer_id | order_total | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 141234 | iPhone | 1 | 700.00 | 2019-01-22 | 944 Walnut St, Boston, MA 02215 | 232266 | 700.00 | 944 Walnut St | Boston | MA | 02215 |
| 1 | 141235 | Lightning Charging Cable | 1 | 14.95 | 2019-01-28 | 185 Maple St, Portland, OR 97035 | 113519 | 14.95 | 185 Maple St | Portland | OR | 97035 |
| 2 | 141236 | Wired Headphones | 2 | 11.99 | 2019-01-17 | 538 Adams St, San Francisco, CA 94016 | 168669 | 23.98 | 538 Adams St | San Francisco | CA | 94016 |
| 3 | 141237 | 27in FHD Monitor | 1 | 149.99 | 2019-01-05 | 738 10th St, Los Angeles, CA 90001 | 199824 | 149.99 | 738 10th St | Los Angeles | CA | 90001 |
| 4 | 141238 | Wired Headphones | 1 | 11.99 | 2019-01-25 | 387 10th St, Austin, TX 73301 | 144894 | 11.99 | 387 10th St | Austin | TX | 73301 |
# CREATING A 'month' AND 'day' COLUMN IN THE 'sales_2019' DATAFRAME
sales_2019['month'] = sales_2019['order_date'].dt.strftime('%m')
sales_2019['day'] = sales_2019['order_date'].dt.strftime('%d')
# OBSERVING THE ADDITION OF THE 'month' AND 'day' COLUMNS TO THE 'sales_2019' DATAFRAME
sales_2019.head()
| order_id | product | qty_ordered | price_each | order_date | purchase_address | customer_id | order_total | address | city | state | zip_code | month | day | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 141234 | iPhone | 1 | 700.00 | 2019-01-22 | 944 Walnut St, Boston, MA 02215 | 232266 | 700.00 | 944 Walnut St | Boston | MA | 02215 | 01 | 22 |
| 1 | 141235 | Lightning Charging Cable | 1 | 14.95 | 2019-01-28 | 185 Maple St, Portland, OR 97035 | 113519 | 14.95 | 185 Maple St | Portland | OR | 97035 | 01 | 28 |
| 2 | 141236 | Wired Headphones | 2 | 11.99 | 2019-01-17 | 538 Adams St, San Francisco, CA 94016 | 168669 | 23.98 | 538 Adams St | San Francisco | CA | 94016 | 01 | 17 |
| 3 | 141237 | 27in FHD Monitor | 1 | 149.99 | 2019-01-05 | 738 10th St, Los Angeles, CA 90001 | 199824 | 149.99 | 738 10th St | Los Angeles | CA | 90001 | 01 | 05 |
| 4 | 141238 | Wired Headphones | 1 | 11.99 | 2019-01-25 | 387 10th St, Austin, TX 73301 | 144894 | 11.99 | 387 10th St | Austin | TX | 73301 | 01 | 25 |
sales_2019['month'].dtype
dtype('O')
# CHANGING THE ORDER OF THE COLUMNS IN THE DATAFRAME
sales_2019 = sales_2019[['order_id','customer_id','order_date','month','day','product','qty_ordered','price_each','order_total','purchase_address','address','city','state','zip_code']]
# OBSERVING THE NEW COLUMN ORDER IN THE 'sales_2019' DATAFRAME
sales_2019
| order_id | customer_id | order_date | month | day | product | qty_ordered | price_each | order_total | purchase_address | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 141234 | 232266 | 2019-01-22 | 01 | 22 | iPhone | 1 | 700.00 | 700.00 | 944 Walnut St, Boston, MA 02215 | 944 Walnut St | Boston | MA | 02215 |
| 1 | 141235 | 113519 | 2019-01-28 | 01 | 28 | Lightning Charging Cable | 1 | 14.95 | 14.95 | 185 Maple St, Portland, OR 97035 | 185 Maple St | Portland | OR | 97035 |
| 2 | 141236 | 168669 | 2019-01-17 | 01 | 17 | Wired Headphones | 2 | 11.99 | 23.98 | 538 Adams St, San Francisco, CA 94016 | 538 Adams St | San Francisco | CA | 94016 |
| 3 | 141237 | 199824 | 2019-01-05 | 01 | 05 | 27in FHD Monitor | 1 | 149.99 | 149.99 | 738 10th St, Los Angeles, CA 90001 | 738 10th St | Los Angeles | CA | 90001 |
| 4 | 141238 | 144894 | 2019-01-25 | 01 | 25 | Wired Headphones | 1 | 11.99 | 11.99 | 387 10th St, Austin, TX 73301 | 387 10th St | Austin | TX | 73301 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 24984 | 319666 | 106487 | 2019-12-11 | 12 | 11 | Lightning Charging Cable | 1 | 14.95 | 14.95 | 14 Madison St, San Francisco, CA 94016 | 14 Madison St | San Francisco | CA | 94016 |
| 24985 | 319667 | 170444 | 2019-12-01 | 12 | 01 | AA Batteries (4-pack) | 2 | 3.84 | 7.68 | 549 Willow St, Los Angeles, CA 90001 | 549 Willow St | Los Angeles | CA | 90001 |
| 24986 | 319668 | 127410 | 2019-12-09 | 12 | 09 | Vareebadd Phone | 1 | 400.00 | 400.00 | 273 Wilson St, Seattle, WA 98101 | 273 Wilson St | Seattle | WA | 98101 |
| 24987 | 319669 | 206162 | 2019-12-03 | 12 | 03 | Wired Headphones | 1 | 11.99 | 11.99 | 778 River St, Dallas, TX 75001 | 778 River St | Dallas | TX | 75001 |
| 24988 | 319670 | 201282 | 2019-12-21 | 12 | 21 | Bose SoundSport Headphones | 1 | 99.99 | 99.99 | 747 Chestnut St, Los Angeles, CA 90001 | 747 Chestnut St | Los Angeles | CA | 90001 |
185950 rows × 14 columns
# OBSERVING THE DIFFERENT CITY/STATE COMBINATIONS AND THE NUBMER OF TIMES THEY APPEAR IN THE DATAFRAME
sales_2019[['city','state']].value_counts()
city state San Francisco CA 44732 Los Angeles CA 29605 New York City NY 24876 Boston MA 19934 Atlanta GA 14881 Dallas TX 14820 Seattle WA 14732 Portland OR 10010 Austin TX 9905 Portland ME 2455 dtype: int64
# FINDING THE SALES TOTAL FOR EACH MONTH
monthly_sales = sales_2019.groupby('month')[['order_total']].agg('sum').reset_index()
monthly_sales
| month | order_total | |
|---|---|---|
| 0 | 01 | 1822256.73 |
| 1 | 02 | 2202022.42 |
| 2 | 03 | 2807100.38 |
| 3 | 04 | 3390670.24 |
| 4 | 05 | 3152606.75 |
| 5 | 06 | 2577802.26 |
| 6 | 07 | 2647775.76 |
| 7 | 08 | 2244467.88 |
| 8 | 09 | 2097560.13 |
| 9 | 10 | 3736726.88 |
| 10 | 11 | 3199603.20 |
| 11 | 12 | 4613443.34 |
# NOW TO VISUALIZE
monthly_sales_graph = px.line(monthly_sales,x='month',y='order_total',title='Sales by Month',labels={'month':'Month','order_total':'Sales($)'})
monthly_sales_graph.show()
# FINDING THE SALES TOTAL OF EACH PRODUCT FOR EACH MONTH
monthly_product_sales = sales_2019.groupby(['product','month'])[['order_total']].agg('sum').reset_index()
monthly_product_sales
| product | month | order_total | |
|---|---|---|---|
| 0 | 20in Monitor | 01 | 23977.82 |
| 1 | 20in Monitor | 02 | 27057.54 |
| 2 | 20in Monitor | 03 | 35856.74 |
| 3 | 20in Monitor | 04 | 43226.07 |
| 4 | 20in Monitor | 05 | 37506.59 |
| ... | ... | ... | ... |
| 223 | iPhone | 08 | 307300.00 |
| 224 | iPhone | 09 | 277900.00 |
| 225 | iPhone | 10 | 501200.00 |
| 226 | iPhone | 11 | 465500.00 |
| 227 | iPhone | 12 | 634200.00 |
228 rows × 3 columns
# NOW TO VISUALIZE
monthly_product_sales_graph = px.line(monthly_product_sales,x='month',y='order_total',color='product',title='Monthly Product Sales',labels={'month':'Month','order_total':'Sales($)','product':'Product'},height=800)
monthly_product_sales_graph.show()
# FINDING THE SALES TOTALS FOR EACH STATE AND VISUALIZE
annual_state_totals = sales_2019.groupby('state')[['order_total']].agg('sum').sort_values('order_total', ascending=False).reset_index()
annual_state_totals
| state | order_total | |
|---|---|---|
| 0 | CA | 13714774.71 |
| 1 | NY | 4664317.43 |
| 2 | TX | 4587557.15 |
| 3 | MA | 3661642.01 |
| 4 | GA | 2795498.58 |
| 5 | WA | 2747755.48 |
| 6 | OR | 1870732.34 |
| 7 | ME | 449758.27 |
# VISUALIZING STATE SALES
state_sales = px.bar(annual_state_totals,x='state',y='order_total',title='2019 Sales by State',labels={'state':'','order_total':'Sales($)'})
state_sales.show()
# FINDING THE SALES TOTALS FOR EACH CITY
city_sales = sales_2019.groupby(['city','state'])[['order_total']].agg('sum').sort_values('order_total',ascending=False).reset_index()
city_sales
| city | state | order_total | |
|---|---|---|---|
| 0 | San Francisco | CA | 8262203.91 |
| 1 | Los Angeles | CA | 5452570.80 |
| 2 | New York City | NY | 4664317.43 |
| 3 | Boston | MA | 3661642.01 |
| 4 | Atlanta | GA | 2795498.58 |
| 5 | Dallas | TX | 2767975.40 |
| 6 | Seattle | WA | 2747755.48 |
| 7 | Portland | OR | 1870732.34 |
| 8 | Austin | TX | 1819581.75 |
| 9 | Portland | ME | 449758.27 |
# MODIFYING THE 'city_sales' DATAFRAME TO INCLUDE THE STATE WITH THE CITY, SINCE THERE ARE TWO 'Portlands' IN THE DATASET
# 'Portland, OR' AND 'Portland, ME'
city_sales['city'] = city_sales['city'] + '(' + city_sales['state'] + ')'
city_sales
| city | state | order_total | |
|---|---|---|---|
| 0 | San Francisco(CA) | CA | 8262203.91 |
| 1 | Los Angeles(CA) | CA | 5452570.80 |
| 2 | New York City(NY) | NY | 4664317.43 |
| 3 | Boston(MA) | MA | 3661642.01 |
| 4 | Atlanta(GA) | GA | 2795498.58 |
| 5 | Dallas(TX) | TX | 2767975.40 |
| 6 | Seattle(WA) | WA | 2747755.48 |
| 7 | Portland(OR) | OR | 1870732.34 |
| 8 | Austin(TX) | TX | 1819581.75 |
| 9 | Portland(ME) | ME | 449758.27 |
# NOW TO VISUALIZE
city_sales_graph = px.bar(city_sales,x='state',y='order_total',color='city',title='State Sales with City Breakdown',labels={'order_total':'Sales($)','state':'','city':'City'})
city_sales_graph.show()
city_sales_graph2 = px.sunburst(city_sales,path=['state','city'],values='order_total',width=800,height=800)
city_sales_graph2.show()
# FINDING THE BREAKDOWN OF SALES TOTALS FOR EACH STATE PER MONTH
monthly_state_totals = sales_2019.groupby(['state','month'])[['order_total']].agg('sum').reset_index()
monthly_state_totals
| state | month | order_total | |
|---|---|---|---|
| 0 | CA | 01 | 724190.23 |
| 1 | CA | 02 | 889134.00 |
| 2 | CA | 03 | 1123656.33 |
| 3 | CA | 04 | 1362690.21 |
| 4 | CA | 05 | 1276368.70 |
| ... | ... | ... | ... |
| 91 | WA | 08 | 189715.09 |
| 92 | WA | 09 | 156369.27 |
| 93 | WA | 10 | 316856.26 |
| 94 | WA | 11 | 252080.13 |
| 95 | WA | 12 | 387317.93 |
96 rows × 3 columns
# NOW TO VISUALIZE
graph_monthly_state_totals = px.area(monthly_state_totals,x='month',y='order_total',color='state',title="2019 Monthly Sales by State",labels={'month':'Month','order_total':'Sales($)','state':'State'},height=800)
graph_monthly_state_totals.show()
# FINDING THE TOTAL NUMBER OF EACH PRODUCT ORDERED PER MONTH
monthly_products_ordered = sales_2019.groupby(['product','month'])[['qty_ordered']].agg('sum').reset_index()
monthly_products_ordered
| product | month | qty_ordered | |
|---|---|---|---|
| 0 | 20in Monitor | 01 | 218 |
| 1 | 20in Monitor | 02 | 246 |
| 2 | 20in Monitor | 03 | 326 |
| 3 | 20in Monitor | 04 | 393 |
| 4 | 20in Monitor | 05 | 341 |
| ... | ... | ... | ... |
| 223 | iPhone | 08 | 439 |
| 224 | iPhone | 09 | 397 |
| 225 | iPhone | 10 | 716 |
| 226 | iPhone | 11 | 665 |
| 227 | iPhone | 12 | 906 |
228 rows × 3 columns
# NOW TO VISUALIZE
monthly_products_ordered_graph = px.scatter(monthly_products_ordered,x='month',y='qty_ordered',color='product',title="Total Products Ordered by Month",labels={'month':'Month','qty_ordered':'Number of Products Ordered','product':'Product'},size='qty_ordered',hover_name='product',color_discrete_sequence=px.colors.sequential.Rainbow,height=600)
monthly_products_ordered_graph.show()
# FINDING THE TOTAL NUMBER OF PRODUCTS ORDERED FOR 2019
products_ordered = sales_2019.groupby('product')[['qty_ordered']].agg('sum').sort_values(by='qty_ordered',ascending=False).reset_index()
products_ordered
| product | qty_ordered | |
|---|---|---|
| 0 | AAA Batteries (4-pack) | 31017 |
| 1 | AA Batteries (4-pack) | 27635 |
| 2 | USB-C Charging Cable | 23975 |
| 3 | Lightning Charging Cable | 23217 |
| 4 | Wired Headphones | 20557 |
| 5 | Apple Airpods Headphones | 15661 |
| 6 | Bose SoundSport Headphones | 13457 |
| 7 | 27in FHD Monitor | 7550 |
| 8 | iPhone | 6849 |
| 9 | 27in 4K Gaming Monitor | 6244 |
| 10 | 34in Ultrawide Monitor | 6199 |
| 11 | Google Phone | 5532 |
| 12 | Flatscreen TV | 4819 |
| 13 | Macbook Pro Laptop | 4728 |
| 14 | ThinkPad Laptop | 4130 |
| 15 | 20in Monitor | 4129 |
| 16 | Vareebadd Phone | 2068 |
| 17 | LG Washing Machine | 666 |
| 18 | LG Dryer | 646 |
# NOW TO VISUALIZE
total_products_ordered = px.bar(products_ordered, x='product', y='qty_ordered', title="Products Ordered in 2019", labels={'product':'','qty_ordered':'Number of Products Ordered'})
total_products_ordered.show()
# TOTAL NUMBER OF ORDERS PER MONTH
total_monthly_orders = sales_2019.groupby('month')[['order_id']].agg('count').reset_index()
total_monthly_orders
| month | order_id | |
|---|---|---|
| 0 | 01 | 9709 |
| 1 | 02 | 11975 |
| 2 | 03 | 15153 |
| 3 | 04 | 18279 |
| 4 | 05 | 16566 |
| 5 | 06 | 13554 |
| 6 | 07 | 14293 |
| 7 | 08 | 11961 |
| 8 | 09 | 11621 |
| 9 | 10 | 20282 |
| 10 | 11 | 17573 |
| 11 | 12 | 24984 |
monthly_orders = px.bar(total_monthly_orders,x='month',y='order_id',title="Total Monthly Orders",labels={'month':'Month','order_id':'Orders'})
monthly_orders.show()
# BREAKDOWN OF TOTAL MONTHLY ORDERS BY STATE
monthly_order_count = sales_2019.groupby(['month','state'])[['order_id']].agg('count').reset_index()
monthly_order_count
| month | state | order_id | |
|---|---|---|---|
| 0 | 01 | CA | 3895 |
| 1 | 01 | GA | 785 |
| 2 | 01 | MA | 1051 |
| 3 | 01 | ME | 123 |
| 4 | 01 | NY | 1313 |
| ... | ... | ... | ... |
| 91 | 12 | ME | 315 |
| 92 | 12 | NY | 3402 |
| 93 | 12 | OR | 1332 |
| 94 | 12 | TX | 3355 |
| 95 | 12 | WA | 1978 |
96 rows × 3 columns
monthly_state_orders = px.bar(monthly_order_count,x='month',y='order_id',color='state',title='Total Monthly Orders by State',labels={'month':'Month','order_id':'Orders','state':'State'})
monthly_state_orders.show()
# LOCATING ORDERS THAT HAVE MULTIPLE ITEMS (PRODUCTS)
## CUSTOMERS ORDERING MORE THAN ONE ITEM
multiple_item_orders = sales_2019[sales_2019['order_id'].duplicated(keep=False)]
multiple_item_orders
| order_id | customer_id | order_date | month | day | product | qty_ordered | price_each | order_total | purchase_address | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 41 | 141275 | 180173 | 2019-01-07 | 01 | 07 | USB-C Charging Cable | 1 | 11.95 | 11.95 | 610 Walnut St, Austin, TX 73301 | 610 Walnut St | Austin | TX | 73301 |
| 42 | 141275 | 180173 | 2019-01-07 | 01 | 07 | Wired Headphones | 1 | 11.99 | 11.99 | 610 Walnut St, Austin, TX 73301 | 610 Walnut St | Austin | TX | 73301 |
| 57 | 141290 | 146916 | 2019-01-02 | 01 | 02 | Apple Airpods Headphones | 1 | 150.00 | 150.00 | 4 1st St, Los Angeles, CA 90001 | 4 1st St | Los Angeles | CA | 90001 |
| 58 | 141290 | 146916 | 2019-01-02 | 01 | 02 | AA Batteries (4-pack) | 3 | 3.84 | 11.52 | 4 1st St, Los Angeles, CA 90001 | 4 1st St | Los Angeles | CA | 90001 |
| 133 | 141365 | 115880 | 2019-01-10 | 01 | 10 | Vareebadd Phone | 1 | 400.00 | 400.00 | 20 Dogwood St, New York City, NY 10001 | 20 Dogwood St | New York City | NY | 10001 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 24900 | 319584 | 168610 | 2019-12-11 | 12 | 11 | Wired Headphones | 1 | 11.99 | 11.99 | 537 Sunset St, Portland, OR 97035 | 537 Sunset St | Portland | OR | 97035 |
| 24912 | 319596 | 152714 | 2019-12-01 | 12 | 01 | iPhone | 1 | 700.00 | 700.00 | 436 14th St, New York City, NY 10001 | 436 14th St | New York City | NY | 10001 |
| 24913 | 319596 | 152714 | 2019-12-01 | 12 | 01 | Lightning Charging Cable | 1 | 14.95 | 14.95 | 436 14th St, New York City, NY 10001 | 436 14th St | New York City | NY | 10001 |
| 24948 | 319631 | 141320 | 2019-12-17 | 12 | 17 | 34in Ultrawide Monitor | 1 | 379.99 | 379.99 | 363 Highland St, Austin, TX 73301 | 363 Highland St | Austin | TX | 73301 |
| 24949 | 319631 | 141320 | 2019-12-17 | 12 | 17 | Lightning Charging Cable | 1 | 14.95 | 14.95 | 363 Highland St, Austin, TX 73301 | 363 Highland St | Austin | TX | 73301 |
14649 rows × 14 columns
# ADDING NEW COLUMNS TO THE 'multiple_item_orders' DATAFRAME TO GROUP ALL PRODUCTS FOR A PARTICULAR ORDER ID TOGETHER,
# NAMING IT ('all_products'), TOTALING THE QUANTITY OF PRODUCTS ('total_qty_ordered') AND TO SUM ORDER TOTALS ('complete_order_total')
multiple_item_orders['all_products'] = multiple_item_orders.groupby('order_id')['product'].transform(lambda x: '/'.join(x))
multiple_item_orders['total_qty_ordered'] = multiple_item_orders.groupby('order_id')['qty_ordered'].transform(sum)
multiple_item_orders['complete_order_total'] = multiple_item_orders.groupby('order_id')['order_total'].transform(sum)
multiple_item_orders
| order_id | customer_id | order_date | month | day | product | qty_ordered | price_each | order_total | purchase_address | address | city | state | zip_code | all_products | total_qty_ordered | complete_order_total | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 41 | 141275 | 180173 | 2019-01-07 | 01 | 07 | USB-C Charging Cable | 1 | 11.95 | 11.95 | 610 Walnut St, Austin, TX 73301 | 610 Walnut St | Austin | TX | 73301 | USB-C Charging Cable/Wired Headphones | 2 | 23.94 |
| 42 | 141275 | 180173 | 2019-01-07 | 01 | 07 | Wired Headphones | 1 | 11.99 | 11.99 | 610 Walnut St, Austin, TX 73301 | 610 Walnut St | Austin | TX | 73301 | USB-C Charging Cable/Wired Headphones | 2 | 23.94 |
| 57 | 141290 | 146916 | 2019-01-02 | 01 | 02 | Apple Airpods Headphones | 1 | 150.00 | 150.00 | 4 1st St, Los Angeles, CA 90001 | 4 1st St | Los Angeles | CA | 90001 | Apple Airpods Headphones/AA Batteries (4-pack) | 4 | 161.52 |
| 58 | 141290 | 146916 | 2019-01-02 | 01 | 02 | AA Batteries (4-pack) | 3 | 3.84 | 11.52 | 4 1st St, Los Angeles, CA 90001 | 4 1st St | Los Angeles | CA | 90001 | Apple Airpods Headphones/AA Batteries (4-pack) | 4 | 161.52 |
| 133 | 141365 | 115880 | 2019-01-10 | 01 | 10 | Vareebadd Phone | 1 | 400.00 | 400.00 | 20 Dogwood St, New York City, NY 10001 | 20 Dogwood St | New York City | NY | 10001 | Vareebadd Phone/Wired Headphones | 2 | 411.99 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 24900 | 319584 | 168610 | 2019-12-11 | 12 | 11 | Wired Headphones | 1 | 11.99 | 11.99 | 537 Sunset St, Portland, OR 97035 | 537 Sunset St | Portland | OR | 97035 | iPhone/Wired Headphones | 2 | 711.99 |
| 24912 | 319596 | 152714 | 2019-12-01 | 12 | 01 | iPhone | 1 | 700.00 | 700.00 | 436 14th St, New York City, NY 10001 | 436 14th St | New York City | NY | 10001 | iPhone/Lightning Charging Cable | 2 | 714.95 |
| 24913 | 319596 | 152714 | 2019-12-01 | 12 | 01 | Lightning Charging Cable | 1 | 14.95 | 14.95 | 436 14th St, New York City, NY 10001 | 436 14th St | New York City | NY | 10001 | iPhone/Lightning Charging Cable | 2 | 714.95 |
| 24948 | 319631 | 141320 | 2019-12-17 | 12 | 17 | 34in Ultrawide Monitor | 1 | 379.99 | 379.99 | 363 Highland St, Austin, TX 73301 | 363 Highland St | Austin | TX | 73301 | 34in Ultrawide Monitor/Lightning Charging Cable | 2 | 394.94 |
| 24949 | 319631 | 141320 | 2019-12-17 | 12 | 17 | Lightning Charging Cable | 1 | 14.95 | 14.95 | 363 Highland St, Austin, TX 73301 | 363 Highland St | Austin | TX | 73301 | 34in Ultrawide Monitor/Lightning Charging Cable | 2 | 394.94 |
14649 rows × 17 columns
# CHANGING THE ORDER OF THE COLUMNS IN THE 'multiple_item_orders' DATAFRAME
# MOVING THE NEWLY CREATED COLUMNS AHEAD OF THE ADDRESS COLUMNS
multiple_item_orders = multiple_item_orders[['order_id','customer_id','order_date','month','day','product','all_products','qty_ordered','total_qty_ordered','price_each','order_total','complete_order_total','purchase_address','address','city','state','zip_code']]
multiple_item_orders
| order_id | customer_id | order_date | month | day | product | all_products | qty_ordered | total_qty_ordered | price_each | order_total | complete_order_total | purchase_address | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 41 | 141275 | 180173 | 2019-01-07 | 01 | 07 | USB-C Charging Cable | USB-C Charging Cable/Wired Headphones | 1 | 2 | 11.95 | 11.95 | 23.94 | 610 Walnut St, Austin, TX 73301 | 610 Walnut St | Austin | TX | 73301 |
| 42 | 141275 | 180173 | 2019-01-07 | 01 | 07 | Wired Headphones | USB-C Charging Cable/Wired Headphones | 1 | 2 | 11.99 | 11.99 | 23.94 | 610 Walnut St, Austin, TX 73301 | 610 Walnut St | Austin | TX | 73301 |
| 57 | 141290 | 146916 | 2019-01-02 | 01 | 02 | Apple Airpods Headphones | Apple Airpods Headphones/AA Batteries (4-pack) | 1 | 4 | 150.00 | 150.00 | 161.52 | 4 1st St, Los Angeles, CA 90001 | 4 1st St | Los Angeles | CA | 90001 |
| 58 | 141290 | 146916 | 2019-01-02 | 01 | 02 | AA Batteries (4-pack) | Apple Airpods Headphones/AA Batteries (4-pack) | 3 | 4 | 3.84 | 11.52 | 161.52 | 4 1st St, Los Angeles, CA 90001 | 4 1st St | Los Angeles | CA | 90001 |
| 133 | 141365 | 115880 | 2019-01-10 | 01 | 10 | Vareebadd Phone | Vareebadd Phone/Wired Headphones | 1 | 2 | 400.00 | 400.00 | 411.99 | 20 Dogwood St, New York City, NY 10001 | 20 Dogwood St | New York City | NY | 10001 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 24900 | 319584 | 168610 | 2019-12-11 | 12 | 11 | Wired Headphones | iPhone/Wired Headphones | 1 | 2 | 11.99 | 11.99 | 711.99 | 537 Sunset St, Portland, OR 97035 | 537 Sunset St | Portland | OR | 97035 |
| 24912 | 319596 | 152714 | 2019-12-01 | 12 | 01 | iPhone | iPhone/Lightning Charging Cable | 1 | 2 | 700.00 | 700.00 | 714.95 | 436 14th St, New York City, NY 10001 | 436 14th St | New York City | NY | 10001 |
| 24913 | 319596 | 152714 | 2019-12-01 | 12 | 01 | Lightning Charging Cable | iPhone/Lightning Charging Cable | 1 | 2 | 14.95 | 14.95 | 714.95 | 436 14th St, New York City, NY 10001 | 436 14th St | New York City | NY | 10001 |
| 24948 | 319631 | 141320 | 2019-12-17 | 12 | 17 | 34in Ultrawide Monitor | 34in Ultrawide Monitor/Lightning Charging Cable | 1 | 2 | 379.99 | 379.99 | 394.94 | 363 Highland St, Austin, TX 73301 | 363 Highland St | Austin | TX | 73301 |
| 24949 | 319631 | 141320 | 2019-12-17 | 12 | 17 | Lightning Charging Cable | 34in Ultrawide Monitor/Lightning Charging Cable | 1 | 2 | 14.95 | 14.95 | 394.94 | 363 Highland St, Austin, TX 73301 | 363 Highland St | Austin | TX | 73301 |
14649 rows × 17 columns
# NOW THAT THE 'multiple_item_orders' DATAFRAME HAS COLUMNS THAT COMBINE ALL PRODUCTS ORDERED PER ORDER ID ALONG WITH
# QUANTITY ORDERED AND COMPLETE ORDER TOTALS, I WILL CLEAN UP THE 'multiple_item_orders' DATAFRAME BY REMOVING THE
# 'product','qty_ordered','price_each','order_total' COLUMNS (AS THEY ARE NO LONGER NECESSARY)
# AND DROPPING THE DUPLICATE ROWS BY ORDER ID
multiple_item_orders = multiple_item_orders.drop_duplicates(subset='order_id')
multiple_item_orders = multiple_item_orders.drop(columns=['product','qty_ordered','price_each','order_total'])
multiple_item_orders
| order_id | customer_id | order_date | month | day | all_products | total_qty_ordered | complete_order_total | purchase_address | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 41 | 141275 | 180173 | 2019-01-07 | 01 | 07 | USB-C Charging Cable/Wired Headphones | 2 | 23.94 | 610 Walnut St, Austin, TX 73301 | 610 Walnut St | Austin | TX | 73301 |
| 57 | 141290 | 146916 | 2019-01-02 | 01 | 02 | Apple Airpods Headphones/AA Batteries (4-pack) | 4 | 161.52 | 4 1st St, Los Angeles, CA 90001 | 4 1st St | Los Angeles | CA | 90001 |
| 133 | 141365 | 115880 | 2019-01-10 | 01 | 10 | Vareebadd Phone/Wired Headphones | 2 | 411.99 | 20 Dogwood St, New York City, NY 10001 | 20 Dogwood St | New York City | NY | 10001 |
| 153 | 141384 | 119608 | 2019-01-03 | 01 | 03 | Google Phone/USB-C Charging Cable | 2 | 611.95 | 223 Jackson St, Boston, MA 02215 | 223 Jackson St | Boston | MA | 02215 |
| 220 | 141450 | 166158 | 2019-01-12 | 01 | 12 | Google Phone/Bose SoundSport Headphones | 2 | 699.99 | 521 Park St, San Francisco, CA 94016 | 521 Park St | San Francisco | CA | 94016 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 24849 | 319536 | 138588 | 2019-12-16 | 12 | 16 | Macbook Pro Laptop/Wired Headphones | 2 | 1711.99 | 346 Elm St, Seattle, WA 98101 | 346 Elm St | Seattle | WA | 98101 |
| 24870 | 319556 | 217093 | 2019-12-22 | 12 | 22 | Google Phone/Wired Headphones | 2 | 611.99 | 848 7th St, Austin, TX 73301 | 848 7th St | Austin | TX | 73301 |
| 24899 | 319584 | 168610 | 2019-12-11 | 12 | 11 | iPhone/Wired Headphones | 2 | 711.99 | 537 Sunset St, Portland, OR 97035 | 537 Sunset St | Portland | OR | 97035 |
| 24912 | 319596 | 152714 | 2019-12-01 | 12 | 01 | iPhone/Lightning Charging Cable | 2 | 714.95 | 436 14th St, New York City, NY 10001 | 436 14th St | New York City | NY | 10001 |
| 24948 | 319631 | 141320 | 2019-12-17 | 12 | 17 | 34in Ultrawide Monitor/Lightning Charging Cable | 2 | 394.94 | 363 Highland St, Austin, TX 73301 | 363 Highland St | Austin | TX | 73301 |
7136 rows × 13 columns
# NOW TO CREATE A DATAFRAME SHOWING THE COUNTS OF EACH UNIQUE VALUE IN THE 'all_products' COLUMN IN THE 'multiple_item_orders'
# DATAFRAME ALONG WITH THE TOTALS OF EACH
paired_products = multiple_item_orders.groupby('all_products').agg(Count=('all_products','count'),Sum=('complete_order_total','sum')).reset_index().sort_values('Count',ascending=False)
paired_products
| all_products | Count | Sum | |
|---|---|---|---|
| 342 | iPhone/Lightning Charging Cable | 882 | 631453.00 |
| 173 | Google Phone/USB-C Charging Cable | 856 | 524701.55 |
| 363 | iPhone/Wired Headphones | 361 | 257388.09 |
| 299 | Vareebadd Phone/USB-C Charging Cable | 312 | 128815.20 |
| 191 | Google Phone/Wired Headphones | 303 | 185804.66 |
| ... | ... | ... | ... |
| 197 | LG Dryer/27in FHD Monitor | 1 | 749.99 |
| 196 | LG Dryer/27in 4K Gaming Monitor | 1 | 989.99 |
| 194 | Google Phone/Wired Headphones/USB-C Charging C... | 1 | 1235.93 |
| 193 | Google Phone/Wired Headphones/Macbook Pro Laptop | 1 | 2323.98 |
| 365 | iPhone/iPhone | 1 | 1400.00 |
366 rows × 3 columns
# DOING A QUICK CHECK TO MAKE SURE THE DATA IN THE 'paired_products', 'multiple_item_orders', AND 'sales2019' DATAFRAME
# ARE CORRECT ACCROSS THEM
# FIRST, CHECK THE LAST 10 ROWS OF THE 'paired_products' DATAFRAME
paired_products.tail(10)
| all_products | Count | Sum | |
|---|---|---|---|
| 203 | LG Dryer/Vareebadd Phone | 1 | 1000.00 |
| 202 | LG Dryer/Lightning Charging Cable | 1 | 614.95 |
| 201 | LG Dryer/Google Phone | 1 | 1200.00 |
| 200 | LG Dryer/Flatscreen TV | 1 | 900.00 |
| 199 | LG Dryer/AAA Batteries (4-pack) | 1 | 602.99 |
| 197 | LG Dryer/27in FHD Monitor | 1 | 749.99 |
| 196 | LG Dryer/27in 4K Gaming Monitor | 1 | 989.99 |
| 194 | Google Phone/Wired Headphones/USB-C Charging C... | 1 | 1235.93 |
| 193 | Google Phone/Wired Headphones/Macbook Pro Laptop | 1 | 2323.98 |
| 365 | iPhone/iPhone | 1 | 1400.00 |
# NOW TO QUERY THE ROW WHERE 'all_products' EQUALS 'Google Phone/Wired Headphones/Macbook Pro Laptop' TO MAKE SURE THE SUM
# IS THE SAME FROM THE 'multiple_item_orders' DATAFRAME
multiple_item_orders.query('(all_products=="Google Phone/Wired Headphones/Macbook Pro Laptop")')
| order_id | customer_id | order_date | month | day | all_products | total_qty_ordered | complete_order_total | purchase_address | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 4659 | 227400 | 231842 | 2019-07-29 | 07 | 29 | Google Phone/Wired Headphones/Macbook Pro Laptop | 4 | 2323.98 | 941 Sunset St, Portland, OR 97035 | 941 Sunset St | Portland | OR | 97035 |
# FINALLY, TO QUERY THE 'order_id' FROM THE 'sales_2019' DATAFRAME TO OBSERVE THE BREAKDOWN OF THE 'total_qty_ordered' AND
# 'complete_order_total' COLUMNS
sales_2019.query('(order_id == 227400)')
| order_id | customer_id | order_date | month | day | product | qty_ordered | price_each | order_total | purchase_address | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 4659 | 227400 | 231842 | 2019-07-29 | 07 | 29 | Google Phone | 1 | 600.00 | 600.00 | 941 Sunset St, Portland, OR 97035 | 941 Sunset St | Portland | OR | 97035 |
| 4660 | 227400 | 231842 | 2019-07-29 | 07 | 29 | Wired Headphones | 2 | 11.99 | 23.98 | 941 Sunset St, Portland, OR 97035 | 941 Sunset St | Portland | OR | 97035 |
| 4661 | 227400 | 231842 | 2019-07-29 | 07 | 29 | Macbook Pro Laptop | 1 | 1700.00 | 1700.00 | 941 Sunset St, Portland, OR 97035 | 941 Sunset St | Portland | OR | 97035 |
# CREATING A DATAFRAME FOR THE TOP 10 PAIRED PRODUCTS TO VISUALIZE
top_10_paired_products = paired_products.head(10)
top_10_paired_products
| all_products | Count | Sum | |
|---|---|---|---|
| 342 | iPhone/Lightning Charging Cable | 882 | 631453.00 |
| 173 | Google Phone/USB-C Charging Cable | 856 | 524701.55 |
| 363 | iPhone/Wired Headphones | 361 | 257388.09 |
| 299 | Vareebadd Phone/USB-C Charging Cable | 312 | 128815.20 |
| 191 | Google Phone/Wired Headphones | 303 | 185804.66 |
| 334 | iPhone/Apple Airpods Headphones | 286 | 243250.00 |
| 163 | Google Phone/Bose SoundSport Headphones | 161 | 113298.39 |
| 305 | Vareebadd Phone/Wired Headphones | 104 | 42954.87 |
| 184 | Google Phone/USB-C Charging Cable/Wired Headph... | 77 | 48139.14 |
| 292 | Vareebadd Phone/Bose SoundSport Headphones | 60 | 30199.38 |
top_10_paired_products['Sum'] = top_10_paired_products['Sum'].round()
top_10_paired_products
| all_products | Count | Sum | |
|---|---|---|---|
| 342 | iPhone/Lightning Charging Cable | 882 | 631453.0 |
| 173 | Google Phone/USB-C Charging Cable | 856 | 524702.0 |
| 363 | iPhone/Wired Headphones | 361 | 257388.0 |
| 299 | Vareebadd Phone/USB-C Charging Cable | 312 | 128815.0 |
| 191 | Google Phone/Wired Headphones | 303 | 185805.0 |
| 334 | iPhone/Apple Airpods Headphones | 286 | 243250.0 |
| 163 | Google Phone/Bose SoundSport Headphones | 161 | 113298.0 |
| 305 | Vareebadd Phone/Wired Headphones | 104 | 42955.0 |
| 184 | Google Phone/USB-C Charging Cable/Wired Headph... | 77 | 48139.0 |
| 292 | Vareebadd Phone/Bose SoundSport Headphones | 60 | 30199.0 |
# NOW TO VISUALIZE
top10 = px.bar(top_10_paired_products,x='all_products',y='Count',color='all_products',title='Top 10 Paired Products',labels={'count':'Count','all_products':'Product Pairs'},text='Sum',height=700)
top10.update_traces(textangle=0, textposition='outside')
top10.show()
# CHECKING THE 'sales_2019' DATAFRAME
sales_2019.head()
| order_id | customer_id | order_date | month | day | product | qty_ordered | price_each | order_total | purchase_address | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 141234 | 232266 | 2019-01-22 | 01 | 22 | iPhone | 1 | 700.00 | 700.00 | 944 Walnut St, Boston, MA 02215 | 944 Walnut St | Boston | MA | 02215 |
| 1 | 141235 | 113519 | 2019-01-28 | 01 | 28 | Lightning Charging Cable | 1 | 14.95 | 14.95 | 185 Maple St, Portland, OR 97035 | 185 Maple St | Portland | OR | 97035 |
| 2 | 141236 | 168669 | 2019-01-17 | 01 | 17 | Wired Headphones | 2 | 11.99 | 23.98 | 538 Adams St, San Francisco, CA 94016 | 538 Adams St | San Francisco | CA | 94016 |
| 3 | 141237 | 199824 | 2019-01-05 | 01 | 05 | 27in FHD Monitor | 1 | 149.99 | 149.99 | 738 10th St, Los Angeles, CA 90001 | 738 10th St | Los Angeles | CA | 90001 |
| 4 | 141238 | 144894 | 2019-01-25 | 01 | 25 | Wired Headphones | 1 | 11.99 | 11.99 | 387 10th St, Austin, TX 73301 | 387 10th St | Austin | TX | 73301 |
# CHECKING THE 'multiple_item_orders' DATAFRAME
multiple_item_orders.head()
| order_id | customer_id | order_date | month | day | all_products | total_qty_ordered | complete_order_total | purchase_address | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 41 | 141275 | 180173 | 2019-01-07 | 01 | 07 | USB-C Charging Cable/Wired Headphones | 2 | 23.94 | 610 Walnut St, Austin, TX 73301 | 610 Walnut St | Austin | TX | 73301 |
| 57 | 141290 | 146916 | 2019-01-02 | 01 | 02 | Apple Airpods Headphones/AA Batteries (4-pack) | 4 | 161.52 | 4 1st St, Los Angeles, CA 90001 | 4 1st St | Los Angeles | CA | 90001 |
| 133 | 141365 | 115880 | 2019-01-10 | 01 | 10 | Vareebadd Phone/Wired Headphones | 2 | 411.99 | 20 Dogwood St, New York City, NY 10001 | 20 Dogwood St | New York City | NY | 10001 |
| 153 | 141384 | 119608 | 2019-01-03 | 01 | 03 | Google Phone/USB-C Charging Cable | 2 | 611.95 | 223 Jackson St, Boston, MA 02215 | 223 Jackson St | Boston | MA | 02215 |
| 220 | 141450 | 166158 | 2019-01-12 | 01 | 12 | Google Phone/Bose SoundSport Headphones | 2 | 699.99 | 521 Park St, San Francisco, CA 94016 | 521 Park St | San Francisco | CA | 94016 |
# NOW TO GIVE EACH UNIQUE PRODUCT IN THE 'sales_2019' DATAFRAME AND EACH UNIQUE PRODUCT PAIR IN THE
# 'multiple_item_orders' DATAFRAME ITS OWN UNIQUE 'product_id','product_pair_id', RESPECTIVELY
sales_2019['product_id'] = sales_2019.groupby('product').ngroup()+1
multiple_item_orders['product_pair_id'] = multiple_item_orders.groupby('all_products').ngroup()+1
# CHANGING THE ORDER OF THE COLUMNS IN THE 'sales_2019' AND 'multiple_item_orders' DATAFRAME
# MOVING THE NEWLY CREATED 'product_id' AND 'product_pair_id' COLUMNS AHEAD OF THE PRODUCT AND ALL PRODUCT COLUMNS
sales_2019 = sales_2019[['order_id','customer_id','order_date','month','day','product_id','product','qty_ordered','price_each','order_total','purchase_address','address','city','state','zip_code']]
multiple_item_orders = multiple_item_orders[['order_id','customer_id','order_date','month','day','product_pair_id','all_products','total_qty_ordered','complete_order_total','purchase_address','address','city','state','zip_code']]
# CHECKING THE 'sales_2019' DATAFRAME
sales_2019.head(10)
| order_id | customer_id | order_date | month | day | product_id | product | qty_ordered | price_each | order_total | purchase_address | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 141234 | 232266 | 2019-01-22 | 01 | 22 | 19 | iPhone | 1 | 700.00 | 700.00 | 944 Walnut St, Boston, MA 02215 | 944 Walnut St | Boston | MA | 02215 |
| 1 | 141235 | 113519 | 2019-01-28 | 01 | 28 | 13 | Lightning Charging Cable | 1 | 14.95 | 14.95 | 185 Maple St, Portland, OR 97035 | 185 Maple St | Portland | OR | 97035 |
| 2 | 141236 | 168669 | 2019-01-17 | 01 | 17 | 18 | Wired Headphones | 2 | 11.99 | 23.98 | 538 Adams St, San Francisco, CA 94016 | 538 Adams St | San Francisco | CA | 94016 |
| 3 | 141237 | 199824 | 2019-01-05 | 01 | 05 | 3 | 27in FHD Monitor | 1 | 149.99 | 149.99 | 738 10th St, Los Angeles, CA 90001 | 738 10th St | Los Angeles | CA | 90001 |
| 4 | 141238 | 144894 | 2019-01-25 | 01 | 25 | 18 | Wired Headphones | 1 | 11.99 | 11.99 | 387 10th St, Austin, TX 73301 | 387 10th St | Austin | TX | 73301 |
| 5 | 141239 | 205768 | 2019-01-29 | 01 | 29 | 6 | AAA Batteries (4-pack) | 1 | 2.99 | 2.99 | 775 Willow St, San Francisco, CA 94016 | 775 Willow St | San Francisco | CA | 94016 |
| 6 | 141240 | 237633 | 2019-01-26 | 01 | 26 | 2 | 27in 4K Gaming Monitor | 1 | 389.99 | 389.99 | 979 Park St, Los Angeles, CA 90001 | 979 Park St | Los Angeles | CA | 90001 |
| 7 | 141241 | 112853 | 2019-01-05 | 01 | 05 | 16 | USB-C Charging Cable | 1 | 11.95 | 11.95 | 181 6th St, San Francisco, CA 94016 | 181 6th St | San Francisco | CA | 94016 |
| 8 | 141242 | 220222 | 2019-01-01 | 01 | 01 | 8 | Bose SoundSport Headphones | 1 | 99.99 | 99.99 | 867 Willow St, Los Angeles, CA 90001 | 867 Willow St | Los Angeles | CA | 90001 |
| 9 | 141243 | 187333 | 2019-01-22 | 01 | 22 | 7 | Apple Airpods Headphones | 1 | 150.00 | 150.00 | 657 Johnson St, San Francisco, CA 94016 | 657 Johnson St | San Francisco | CA | 94016 |
# CHECKING THE 'multiple_item_orders' DATAFRAME
multiple_item_orders.head(10)
| order_id | customer_id | order_date | month | day | product_pair_id | all_products | total_qty_ordered | complete_order_total | purchase_address | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 41 | 141275 | 180173 | 2019-01-07 | 01 | 07 | 285 | USB-C Charging Cable/Wired Headphones | 2 | 23.94 | 610 Walnut St, Austin, TX 73301 | 610 Walnut St | Austin | TX | 73301 |
| 57 | 141290 | 146916 | 2019-01-02 | 01 | 02 | 110 | Apple Airpods Headphones/AA Batteries (4-pack) | 4 | 161.52 | 4 1st St, Los Angeles, CA 90001 | 4 1st St | Los Angeles | CA | 90001 |
| 133 | 141365 | 115880 | 2019-01-10 | 01 | 10 | 306 | Vareebadd Phone/Wired Headphones | 2 | 411.99 | 20 Dogwood St, New York City, NY 10001 | 20 Dogwood St | New York City | NY | 10001 |
| 153 | 141384 | 119608 | 2019-01-03 | 01 | 03 | 174 | Google Phone/USB-C Charging Cable | 2 | 611.95 | 223 Jackson St, Boston, MA 02215 | 223 Jackson St | Boston | MA | 02215 |
| 220 | 141450 | 166158 | 2019-01-12 | 01 | 12 | 164 | Google Phone/Bose SoundSport Headphones | 2 | 699.99 | 521 Park St, San Francisco, CA 94016 | 521 Park St | San Francisco | CA | 94016 |
| 228 | 141457 | 212963 | 2019-01-09 | 01 | 09 | 335 | iPhone/Apple Airpods Headphones | 2 | 850.00 | 820 Jackson St, Seattle, WA 98101 | 820 Jackson St | Seattle | WA | 98101 |
| 250 | 141478 | 132046 | 2019-01-26 | 01 | 26 | 163 | Google Phone/Apple Airpods Headphones | 2 | 750.00 | 303 North St, Atlanta, GA 30301 | 303 North St | Atlanta | GA | 30301 |
| 323 | 141550 | 146803 | 2019-01-31 | 01 | 31 | 338 | iPhone/Apple Airpods Headphones/Wired Headphones | 4 | 1011.99 | 399 Church St, Boston, MA 02215 | 399 Church St | Boston | MA | 02215 |
| 362 | 141587 | 163327 | 2019-01-29 | 01 | 29 | 275 | USB-C Charging Cable/Apple Airpods Headphones | 2 | 161.95 | 504 14th St, Dallas, TX 75001 | 504 14th St | Dallas | TX | 75001 |
| 421 | 141645 | 237729 | 2019-01-30 | 01 | 30 | 233 | Lightning Charging Cable/Wired Headphones | 2 | 26.94 | 98 Forest St, New York City, NY 10001 | 98 Forest St | New York City | NY | 10001 |